// edge 
#include "stdio.h"
#include "cim144.h"
//#include "encoding.h"
#include "util.h"

#define I1 784

union Mac_data{
    uint8_t  d8[9] ;
    uint64_t d64   ;
}mac_in;

union Edge_Weight
{
    int8_t d8[9];
    uint64_t d64;
}edge_weight={1,1,1,1,-8,1,1,1,1};


uint8_t img [28][28] = {0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0, 116, 125, 171, 255, 255, 150,  93,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0, 169, 253, 253, 253, 253, 253, 253, 218,  30,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0, 169, 253, 253, 253, 213, 142, 176, 253,253, 122,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,  52, 250, 253, 210,  32,  12,   0,  6, 206, 253, 140,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,  77, 251, 210,  25,   0,  0,   0, 122, 248, 253,  65,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  31,  18,  0,   0,   0,   0, 209, 253, 253,  65,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0, 117, 247, 253, 198,  10,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,  76, 247, 253, 231,  63,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0, 128, 253, 253, 144,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0, 176, 246, 253, 159, 12,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  25, 234, 253,233,  35,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 198,253, 253, 141,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 78, 248, 253, 189,  12,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,  19, 200, 253, 253, 141,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0, 134, 253, 253, 173,  12,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0, 248, 253, 253,  25,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0, 248, 253, 253,  43,  20,  20, 20,  20,   5,   0,   5,  20,  20,  37, 150, 150, 150, 147,  10,  0,   0,   0,   0,   0,   0,   0,   0,   0, 248, 253, 253, 253,253, 253, 253, 253, 168, 143, 166, 253, 253, 253, 253, 253, 253,253, 123,   0,   0,   0,   0,   0,   0,   0,   0,   0, 174, 253,253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 249, 247, 247,169, 117, 117,  57,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0, 118, 123, 123, 123, 166, 253, 253, 253, 155, 123, 123,  41,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0,   0,   0,   0};
int main(void){
    //int8_t zeros[576]={0};

    uint64_t push_rs2 , save_rs2,mvm_rs2,mvm_rs1 ,config;
    uint16_t push_start,push_num,save_start,save_num;
    uint16_t mvm_row_start,mvm_row_num,mvm_col_start,mvm_col_num,mvm_array_index;
    uint8_t  random,wise_num,out_shift,cim_shift;
    int8_t tmp_out ;
    int8_t  cnn1_in [16]        = {0};
    uint8_t img_out [26][26]    = {0};
    size_t cycles;
    cycles = -read_csr(cycle);
    //--------*****1.show img ******************
    // printf("\n**********show img*******************\n");
    // for(int i=0;i<784;i++){
    //     if(img[i]){
    //         printf("%c%c",'1',' ');
    //     }else{
    //         printf("%c%c",'-',' ');
    //     }
    //     if((i+1)%28==0){
    //         printf("\n");
    //     }
    // }
//##################################1. cim144  #######################################
//                                ######         ######
//                               ########       ########
//                                ######         ######
//                                 ###            ###
//                                  #              #
//####################################################################################

    // 112,[1,1,1,1,-8,1,1,1,1]  
    // 113,[-1,-1,-1,0,0,0,1,1,1]
    // 114,[-1,0,1,-1,0,1,-1,0,1]
    // 120,[0,1,1,1,0, 1,3,4,3,1, 1,4,7,4,1, 1,3,4,3,1, 0,1,1,1,0]
    push_start      = 0     ;
    push_num        = 16    ;
    save_start      = 112   ;
    save_num        = 8     ;
    mvm_array_index = 0     ;
    mvm_row_start   = 0     ;
    mvm_row_num     = 9     ;
    mvm_col_start   = 112   ;
    mvm_col_num     = 1     ;
    random          = 0     ;
    wise_num        = 16    ;
    out_shift       = 0     ;
    cim_shift       = 0     ;

    push_rs2 = push_start       << 11 | push_num;          //reg: row start, num
    save_rs2 = save_start       << 11 | save_num ;         //reg: col start, num
    //mvm: array_index,row_start, col_start
    mvm_rs2  = mvm_array_index  << 22 | mvm_row_start << 11 | mvm_col_start; 
    mvm_rs1  = mvm_row_num      << 16 | mvm_col_num;       //mvm: row_num,   col_num
    //conf: random, wise_num, out_shift, cim_shift
    config   =  random          << 23 | wise_num << 16 | out_shift<<8 | cim_shift;   
    ROCC_config(config);
    int8_t  cnn1_out[8]         = {0};
    //------------------ 1.1 img2col mvm ------------------------
    
    for(int i = 0; i< 28-2;i++){
        for(int j = 0;j<28-2;j++){
            //img2col
            cnn1_in[0] = img[i+0][j+0]>>4; cnn1_in[1] = img[i+0][j+1]>>4; cnn1_in[2] = img[i+0][j+2]>>4;
            cnn1_in[3] = img[i+1][j+0]>>4; cnn1_in[4] = img[i+1][j+1]>>4; cnn1_in[5] = img[i+1][j+2]>>4;
            cnn1_in[6] = img[i+2][j+0]>>4; cnn1_in[7] = img[i+2][j+1]>>4; cnn1_in[8] = img[i+2][j+2]>>4;
            ROCC_push((int64_t)cnn1_in ,push_rs2);
            ROCC_mvm(mvm_rs1,mvm_rs2);
            ROCC_save((int64_t)cnn1_out,save_rs2);
            // printf("%4d ",cnn1_out[0]);
            if(cnn1_out[0]>0){
                img_out[i][j] = 1;
            }
	    }
        // printf("\n");
    }
    cycles += read_csr(cycle);
    printf("cycles: %d \n",cycles);
    cycles = -read_csr(cycle);

//##################################2. cpu  #######################################
//                                ######         ######
//                               ########       ########
//                                ######         ######
//                                 ###            ###
//                                  #              #
//###################################################################################
    for(int i = 0; i< 28-2;i++){
        for(int j = 0;j<28-2;j++){
            //img2col
            cnn1_in[0] = img[i+0][j+0] >> 4; cnn1_in[1] = img[i+0][j+1]>>4; cnn1_in[2] = img[i+0][j+2]>>4;
            cnn1_in[3] = img[i+1][j+0] >> 4; cnn1_in[4] = img[i+1][j+1]>>4; cnn1_in[5] = img[i+1][j+2]>>4;
            cnn1_in[6] = img[i+2][j+0] >> 4; cnn1_in[7] = img[i+2][j+1]>>4; cnn1_in[8] = img[i+2][j+2]>>4;

            tmp_out = 0;
            for(int k=0;k<9;k++){
                tmp_out = tmp_out + cnn1_in[k] * edge_weight.d8[k] ;
                // printf("%d\n",cnn1_in[k]);
                // printf("%d %d %d \n", tmp_out,cnn1_in[k],edge_weight.d8[k]);
            }
            
            // tmp_out =   cnn1_in[0] + cnn1_in[1]       + cnn1_in[2]+\
            //             cnn1_in[3] - (cnn1_in[4]<<3)  + cnn1_in[5]+\
            //             cnn1_in[6] + cnn1_in[7]       + cnn1_in[8];
            
            // printf("%4d ",tmp_out);
            // if(i == 2){
            //     for(int k = 0 ;k<9;k++){
            //         printf("%4d ",cnn1_in[k]);
            //     }
            //     printf("\t%d \n",tmp_out);    
            // }
            //printf("%4d ",tmp_out);
            if(tmp_out > 0 ){
                img_out[i][j] = 1;
            }
        }
        // printf("\n");
    }
    cycles += read_csr(cycle);
    printf("cycles: %d \n",cycles);
    cycles = -read_csr(cycle);
//##################################3. RoCC  #######################################
//                                ######         ######
//                               ########       ########
//                                ######         ######
//                                 ###            ###
//                                  #              #
//###################################################################################

    //uint64_t edge_weight    = 0x010101f801010101;
    int64_t mac_rd          = 0;

    // mac_in.d8[0] = 0; mac_in.d8[1] = 0; mac_in.d8[2] = 0;
    // mac_in.d8[3] = 0; mac_in.d8[4] = 7; mac_in.d8[5] = 7;
    // mac_in.d8[6] = 10; mac_in.d8[7] = 15; mac_in.d8[8] = 15;
    // RoCC_mac(mac_rd,mac_in.d64,edge_weight);
    // tmp_out = mac_rd + mac_in.d8[8];
    // printf("%d %d \n", mac_rd,tmp_out);

   
    for(int i = 0; i< 28-2;i++){
        for(int j = 0;j<28-2;j++){
            //img2col
            mac_in.d8[0] = img[i+0][j+0]>>4; mac_in.d8[1] = img[i+0][j+1]>>4; mac_in.d8[2] = img[i+0][j+2]>>4;
            mac_in.d8[3] = img[i+1][j+0]>>4; mac_in.d8[4] = img[i+1][j+1]>>4; mac_in.d8[5] = img[i+1][j+2]>>4;
            mac_in.d8[6] = img[i+2][j+0]>>4; mac_in.d8[7] = img[i+2][j+1]>>4; mac_in.d8[8] = img[i+2][j+2]>>4;
            RoCC_mac(mac_rd,mac_in.d64,edge_weight.d64);
            tmp_out = (mac_rd) + (mac_in.d8[8]);
            // if(i==2){
            //     printf("%llx ", mac_in.d64);
            // } 
            // printf("%4d ", tmp_out);
            if(tmp_out> 0){
                img_out[i][j] = 1;
            }
        }               
        // printf("\n");                                   
    }
    cycles += read_csr(cycle);
    printf("cycles: %d \n",cycles);
    cycles = -read_csr(cycle);

    // for(int i =0 ;i< 28-2;i++){
    //     for(int j = 0;j<28-2;j++){
    //         printf("%d ",img_out[i][j]);
    //     }
    //     printf("\n");
    // }

    printf("\nfinish\n");
    return 0;    
}
